{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import yaml\n",
    "import json\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = '../stories/all_stories.yml'\n",
    "with open(path, 'r', encoding='utf-8') as f:\n",
    "    dataset = yaml.load(f.read(),Loader=yaml.Loader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def construct_dataset(dataset):\n",
    "    # 将数据集的 intent entities 以及action 的类别抽取出来\n",
    "    intent_set = ['PAD']\n",
    "    entities_set = ['PAD']\n",
    "    action_set = ['PAD']\n",
    "    for v in dataset:\n",
    "        worth = v['steps']\n",
    "        for i in worth:\n",
    "            for key, val in i.items():\n",
    "                if key  == 'intent':\n",
    "                    intent_set.append(val)\n",
    "                if key == \"action\":\n",
    "                    action_set.append(val)\n",
    "                if key == 'entities':\n",
    "                    if val:\n",
    "                        for i in val:\n",
    "                            for key , value in i.items():\n",
    "                                entities_set.append(key)\n",
    "    return set(intent_set), set(entities_set), set(action_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "intent: {'Control-AC_Mode', 'PAD', 'Control-Humidifier_State', 'inform_mode', 'Control-RangHood_State', 'Control-Humidifier_Gear', 'affirm', 'Control-Fan_Gear', 'inform_Lamp', 'Control-Lamp_State', 'Control-AC_Wind', 'Control-RiceCooker_Timing', 'Control-Fan_Mode', 'Control-AC_Timing', 'whattodo', 'Control-Curtain_State', 'Control-Oven_Mode', 'inform_time', 'Control-Fan_Timing', 'Control-AC_State', 'Control-Humidifier_Timing', 'Control-Lamp_Mode', 'Control-Fan_Wind', 'Control-RiceCooker_State', 'Control-Fan_State', 'inform_sensorvalue', 'whoareyou', 'Control-Lamp_Lightness', 'Control-Oven_State', 'goodbye', 'Control-Lamp_Timing', 'inform_AC', 'Control-Oven_Temp', 'Control-AC_Temp', 'Control-Curtain_Timing', 'inform_range', 'greet', 'Control-RiceCooker_Mode', 'deny', 'Control-Lamp_Color', 'thanks', 'inform_address'} \n",
      " inetent_len : 42\n",
      "entities: {'gear_level', 'address', 'time', 'mode', 'PAD', 'operation', 'target', 'color', 'range', 'sensorvalue', 'device', 'date_time', 'temperature'} \n",
      " entities_len : 13\n",
      "action: {'Control-AC_Mode', 'PAD', 'Control-Humidifier_State', 'utter_answer_goodbye', 'Control-RangHood_State', 'Control-Humidifier_Gear', 'Control-Fan_Gear', 'Control-Lamp_State', 'Control-AC_Wind', 'utter_answer_deny', 'Control-RiceCooker_Timing', 'Control-Fan_Mode', 'Control-AC_Timing', 'utter_answer_whoareyou', 'Control-Curtain_State', 'Control-Oven_Mode', 'Control-Fan_Timing', 'Control-AC_State', 'Control-Humidifier_Timing', 'Control-Lamp_Mode', 'Control-Fan_Wind', 'Control-RiceCooker_State', 'Control-Fan_State', 'utter_answer_whattodo', 'Control-Lamp_Lightness', 'Control-Oven_State', 'Control-Lamp_Timing', 'Control-Oven_Temp', 'Control-AC_Temp', 'Control-Curtain_Timing', 'utter_answer_greet', 'Control-RiceCooker_Mode', 'utter_answer_thanks', 'Control-Lamp_Color', 'utter_answer_affirm'} \n",
      " action_len : 35\n"
     ]
    }
   ],
   "source": [
    "intent, entities, action = construct_dataset(dataset)\n",
    "print('intent: {} \\n inetent_len : {}'.format(intent,len(intent)))\n",
    "print('entities: {} \\n entities_len : {}'.format(entities,len(entities)))\n",
    "print('action: {} \\n action_len : {}'.format(action, len(action)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建意图索引\n",
    "intent2id = {}\n",
    "for index, val in enumerate(intent):\n",
    "    intent2id.update({val:index})\n",
    "\n",
    "id2intent = {}\n",
    "for index, val in enumerate(intent):\n",
    "    id2intent.update({index:val})  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建槽位索引\n",
    "entities2id = {}\n",
    "for index, val in enumerate(entities):\n",
    "    entities2id.update({val:index})\n",
    "\n",
    "id2entities = {}\n",
    "for index, val in enumerate(entities):\n",
    "    id2entities.update({index:val}) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 构建动作索引\n",
    "action2id = {}\n",
    "for index, val in enumerate(action):\n",
    "    action2id.update({val:index})\n",
    "\n",
    "id2action = {}\n",
    "for index, val in enumerate(action):\n",
    "    id2action.update({index:val})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将label2id 转为字典以json文件储存\n",
    "char = {}\n",
    "char.update({'action2id' : action2id})\n",
    "char.update({'id2action' : id2action})\n",
    "char.update({'intent2id' : intent2id})\n",
    "char.update({'id2intent' : id2intent})\n",
    "char.update({'entities2id' : entities2id})\n",
    "char.update({'id2entities' : id2entities})\n",
    "\n",
    "with open('./DM_char.json', mode='w', encoding='utf-8') as f:\n",
    "    json.dump(char, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
